setwd("/mnt/picea/projects/aspseq/jfelten/T89-Laccaria-bicolor")
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(gplots))
suppressPackageStartupMessages(library(hyperSpec))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(VennDiagram))
suppressMessages(source("~/Git/UPSCb/src/R/plotMA.R"))
suppressMessages(source("~/Git/UPSCb/src/R/volcanoPlot.R"))
pal=brewer.pal(8,"Dark2")
hpal <- colorRampPalette(c("blue","white","red"))(100)
mar <- par("mar")
"line_plot" <- function(dds,vst,gene_id){
sel <- grepl(gene_id,rownames(vst))
stopifnot(sum(sel)==1)
return(
ggplot(bind_cols(as.data.frame(colData(dds)),
melt(vst[sel,])),
aes(x=Time,y=value,col=Experiment,group=Experiment)) +
geom_point() + geom_smooth() +
scale_y_continuous(name="VST expression") +
ggtitle(label=paste("Expression for: ",gene_id))
)
}
"extract_results" <- function(dds,vst,contrast,
padj=0.01,lfc=0.5,
plot=TRUE,verbose=TRUE,
export=TRUE,default_dir="analysis/DE",
default_prefix="DE-",
labels=colnames(dds),
sample_sel=1:ncol(dds)){
if(length(contrast)==1){
res <- results(dds,name=contrast)
} else {
res <- results(dds,contrast=contrast)
}
if(plot){
par(mar=c(5,5,5,5))
volcanoPlot(res)
par(mar=mar)
}
sel <- res$padj <= padj & abs(res$log2FoldChange) >= lfc & ! is.na(res$padj)
if(verbose){
message(sprintf("There are %s genes that are DE",sum(sel)))
}
if(export){
if(!dir.exists(default_dir)){
dir.create(default_dir,showWarnings=FALSE,recursive=TRUE,mode="0771")
}
write.csv(res,file=file.path(default_dir,paste0(default_prefix,"results.csv")))
write.csv(res[sel,],file.path(default_dir,paste0(default_prefix,"genes.csv")))
}
if(plot){
heatmap.2(t(scale(t(vst[sel,sample_sel]))),
distfun = pearson.dist,
hclustfun = function(X){hclust(X,method="ward.D2")},
trace="none",col=hpal,labRow = FALSE,
labCol=labels[sample_sel]
)
}
return(rownames(res[sel,]))
}
load("analysis/salmon/Lacbi-all-dds.rda")
vsd <- varianceStabilizingTransformation(dds,blind=FALSE)
vst <- assay(vsd)
vst <- vst - min(vst)
line_plot(dds,vst,"245379")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"676331")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"Lacbi1.eu2.Lbscf0069g00950")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"315258")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotDispEsts(dds)
The model used is:
Experiment * Time meaning that the Experiment and Time variable as well as their interaction Experiment:Time is considered. Because we cannot assume that these two variables explain all the variance in the data, there is also an Intercept for the linear model. This also implies that the model assumes ECM at 3 hours to be the baseline; i.e. everything is compared against it.
resultsNames(dds)
## [1] "Intercept" "Experiment_FLM_vs_ECM"
## [3] "Time_7_vs_3" "Time_14_vs_3"
## [5] "Time_21_vs_3" "Time_28_vs_3"
## [7] "ExperimentFLM.Time7" "ExperimentFLM.Time14"
## [9] "ExperimentFLM.Time21" "ExperimentFLM.Time28"
In the following we look at the interaction specific genes; i.e. genes that changes at a given time transition in between experiments ### FLM vs. ECM at T3
Lb_3 <- extract_results(dds,vst,"Experiment_FLM_vs_ECM",
default_prefix="Labic_FLM-vs-ECM_T3_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==3)
## Loading required package: LSD
## There are 2100 genes that are DE
Here we want to conmbine the effect of FLM-ECM at time T3 and the specific FLM:T7 interaction
Lb_7 <- extract_results(dds,vst,c(0,1,0,0,0,0,1,0,0,0),
default_prefix="Labic_FLM-vs-ECM_T7_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==7)
## There are 1222 genes that are DE
Lb_14 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,1,0,0),
default_prefix="Labic_FLM-vs-ECM_T14_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==14)
## There are 362 genes that are DE
Lb_21 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,1,0),
default_prefix="Labic_FLM-vs-ECM_T21_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==21)
## There are 381 genes that are DE
Lb_28 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,0,1),
default_prefix="Labic_FLM-vs-ECM_T28_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==28)
## There are 673 genes that are DE
grid.newpage()
grid.draw(venn.diagram(list(T3=Lb_3,
T7=Lb_7,
T14=Lb_14,
T21=Lb_21,
T28=Lb_28),
NULL,
fill=pal[1:5]))
load("analysis/salmon/Potra-104-127-139-removed-dds.rda")
vsd <- varianceStabilizingTransformation(dds,blind=FALSE)
vst <- assay(vsd)
vst <- vst - min(vst)
line_plot(dds,vst,"Potra000962g07909")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"Potra001661g13641")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"Potra003711g22520")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"Potra006413g25676")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotDispEsts(dds)
The model used is:
Experiment * Time meaning that the Experiment and Time variable as well as their interaction Experiment:Time is considered. Because we cannot assume that these two variables explain all the variance in the data, there is also an Intercept for the linear model. This also implies that the model assumes Cont at 3 hours to be the baseline; i.e. everything is compared against it.
resultsNames(dds)
## [1] "Intercept" "Experiment_ECM_vs_Cont"
## [3] "Time_7_vs_3" "Time_14_vs_3"
## [5] "Time_21_vs_3" "Time_28_vs_3"
## [7] "ExperimentECM.Time7" "ExperimentECM.Time14"
## [9] "ExperimentECM.Time21" "ExperimentECM.Time28"
In the following we look at the interaction specific genes; i.e. genes that changes at a given time transition in between experiments ### ECM vs. Cont at T3
Pa_3 <- extract_results(dds,vst,"Experiment_ECM_vs_Cont",
default_prefix="Potra_ECM-vs-Cont_T3_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==3)
## There are 230 genes that are DE
Pa_7 <- extract_results(dds,vst,c(0,1,0,0,0,0,1,0,0,0),
default_prefix="Potra_ECM-vs-Cont_T7_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==7)
## There are 205 genes that are DE
Pa_14 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,1,0,0),
default_prefix="Potra_ECM-vs-Cont_T14_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==14)
## There are 76 genes that are DE
Pa_21 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,1,0),
default_prefix="Potra_ECM-vs-Cont_T21_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==21)
## There are 1276 genes that are DE
Pa_28 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,0,1),
default_prefix="Potra_ECM-vs-Cont_T28_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==28)
## There are 416 genes that are DE
grid.newpage()
grid.draw(venn.diagram(list(T3=Pa_3,
T7=Pa_7,
T14=Pa_14,
T21=Pa_21,
T28=Pa_28),
NULL,
fill=pal[1:5]))
load("analysis/salmon/Potri-all-dds.rda")
vsd <- varianceStabilizingTransformation(dds,blind=FALSE)
vst <- assay(vsd)
vst <- vst - min(vst)
line_plot(dds,vst,"Potri.006G134800")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"Potri.006G221800")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"Potri.002G173900")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
line_plot(dds,vst,"Potri.004G088100")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotDispEsts(dds)
The model used is:
Experiment * Time meaning that the Experiment and Time variable as well as their interaction Experiment:Time is considered. Because we cannot assume that these two variables explain all the variance in the data, there is also an Intercept for the linear model. This also implies that the model assumes Cont at 3 hours to be the baseline; i.e. everything is compared against it.
resultsNames(dds)
## [1] "Intercept" "Experiment_ECM_vs_Cont"
## [3] "Time_7_vs_3" "Time_14_vs_3"
## [5] "Time_21_vs_3" "Time_28_vs_3"
## [7] "ExperimentECM.Time7" "ExperimentECM.Time14"
## [9] "ExperimentECM.Time21" "ExperimentECM.Time28"
In the following we look at the interaction specific genes; i.e. genes that changes at a given time transition in between experiments ### ECM vs. Cont at T3
Pa_3 <- extract_results(dds,vst,"Experiment_ECM_vs_Cont",
default_prefix="Potri_ECM-vs-Cont_T3_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==3)
## There are 311 genes that are DE
Pa_7 <- extract_results(dds,vst,c(0,1,0,0,0,0,1,0,0,0),
default_prefix="Potri_ECM-vs-Cont_T7_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==7)
## There are 219 genes that are DE
Pa_14 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,1,0,0),
default_prefix="Potri_ECM-vs-Cont_T14_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==14)
## There are 220 genes that are DE
Pa_21 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,1,0),
default_prefix="Potri_ECM-vs-Cont_T21_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==21)
## There are 1899 genes that are DE
Pa_28 <- extract_results(dds,vst,c(0,1,0,0,0,0,0,0,0,1),
default_prefix="Potri_ECM-vs-Cont_T28_",
labels=paste0(colData(dds)$Experiment,
colData(dds)$Time),
sample_sel=colData(dds)$Time==28)
## There are 355 genes that are DE
grid.newpage()
grid.draw(venn.diagram(list(T3=Pa_3,
T7=Pa_7,
T14=Pa_14,
T21=Pa_21,
T28=Pa_28),
NULL,
fill=pal[1:5]))
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.3 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] LSD_4.0-0 limma_3.40.6
## [3] VennDiagram_1.6.20 futile.logger_1.4.3
## [5] forcats_0.4.0 stringr_1.4.0
## [7] dplyr_0.8.3 purrr_0.3.2
## [9] readr_1.3.1 tidyr_0.8.3
## [11] tibble_2.1.3 tidyverse_1.2.1
## [13] RColorBrewer_1.1-2 hyperSpec_0.99-20180627
## [15] ggplot2_3.2.1 lattice_0.20-38
## [17] gplots_3.0.1.1 DESeq2_1.24.0
## [19] SummarizedExperiment_1.14.1 DelayedArray_0.10.0
## [21] BiocParallel_1.18.1 matrixStats_0.54.0
## [23] Biobase_2.44.0 GenomicRanges_1.36.0
## [25] GenomeInfoDb_1.20.0 IRanges_2.18.2
## [27] S4Vectors_0.22.0 BiocGenerics_0.30.0
## [29] data.table_1.12.2
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 htmlTable_1.13.1 XVector_0.24.0
## [4] base64enc_0.1-3 rstudioapi_0.10 bit64_0.9-7
## [7] AnnotationDbi_1.46.1 lubridate_1.7.4 xml2_1.2.2
## [10] splines_3.6.1 geneplotter_1.62.0 knitr_1.24
## [13] zeallot_0.1.0 Formula_1.2-3 jsonlite_1.6
## [16] broom_0.5.2 annotate_1.62.0 cluster_2.1.0
## [19] compiler_3.6.1 httr_1.4.1 backports_1.1.4
## [22] assertthat_0.2.1 Matrix_1.2-17 lazyeval_0.2.2
## [25] cli_1.1.0 formatR_1.7 acepack_1.4.1
## [28] htmltools_0.3.6 tools_3.6.1 gtable_0.3.0
## [31] glue_1.3.1 GenomeInfoDbData_1.2.1 reshape2_1.4.3
## [34] Rcpp_1.0.2 cellranger_1.1.0 vctrs_0.2.0
## [37] gdata_2.18.0 nlme_3.1-141 xfun_0.9
## [40] testthat_2.2.1 rvest_0.3.4 gtools_3.8.1
## [43] XML_3.98-1.20 zlibbioc_1.30.0 scales_1.0.0
## [46] hms_0.5.1 lambda.r_1.2.3 yaml_2.2.0
## [49] memoise_1.1.0 gridExtra_2.3 rpart_4.1-15
## [52] latticeExtra_0.6-28 stringi_1.4.3 RSQLite_2.1.2
## [55] highr_0.8 genefilter_1.66.0 checkmate_1.9.4
## [58] caTools_1.17.1.2 rlang_0.4.0 pkgconfig_2.0.2
## [61] bitops_1.0-6 evaluate_0.14 labeling_0.3
## [64] htmlwidgets_1.3 bit_1.1-14 tidyselect_0.2.5
## [67] plyr_1.8.4 magrittr_1.5 R6_2.4.0
## [70] generics_0.0.2 Hmisc_4.2-0 DBI_1.0.0
## [73] pillar_1.4.2 haven_2.1.1 foreign_0.8-72
## [76] withr_2.1.2 survival_2.44-1.1 RCurl_1.95-4.12
## [79] nnet_7.3-12 modelr_0.1.5 crayon_1.3.4
## [82] futile.options_1.0.1 KernSmooth_2.23-15 rmarkdown_1.15
## [85] locfit_1.5-9.1 readxl_1.3.1 blob_1.2.0
## [88] digest_0.6.20 xtable_1.8-4 munsell_0.5.0